Sample description

Fitting probit model.

Load data

db <- haven::read_dta('C:/Users/emi.ABLE-22868/OneDrive/UWA PhD/bankFailure/data/failures-1997-2001-annual.dta') %>%
        dplyr::mutate(., 
          EXIT_DATE_Q = quarter(as_date(EXIT_DATE, origin=ymd('1960-01-01')), with_year=TRUE),
          EXIT_DATE_Y = year(as_date(EXIT_DATE, origin=ymd('1960-01-01'))),
          START_Q = quarter(as_date(FIRST_DATE, origin=ymd('1960-01-01')), with_year=TRUE),
          .after = IDENT) 

Define sample

samsSpecs <- list('s98.Priv.01.onT' = c(samStart=1998, samEnd=2001, stateOwn=FALSE),
                  's99.Priv.01.onT' = c(samStart=1999, samEnd=2001, stateOwn=FALSE))

The benchmark case I select all banks alive in 1997 and use their covariate to predict failure until time 2001 .

Here, I include banks alive on each t (instead of only banks alive on 1997q4).

Are state-owned banks included? 0

samStart contains the date at which the X covariates are considered (predictors), where samEnd is the last time failures are considered.

Estimation

State-owned banks are 4 (national) & 5 (provincial or municipal). Branches of foreign banks is 6, 5 is ‘cajas de credito’, 6 cia fcieras extranjero, 7 cia fcieras nacional.

r1 <- map(samsSpecs, function(samSpec) {
  # samSpec <- samsSpecs[[1]]
  sam <- db %>% 
          # Select observations of banks alive in 1997.4          
          #filter(START_Q <= 1997.4 & EXIT_DATE_Q > 1997.4) %>%
          # Only complete observations
          filter_all(all_vars(!is.na(.))) %>%
          # Only observations at this time
          filter(FECHA_A == samSpec['samStart']) %>%
          # ONly private banks
          filter(GRUPO_ID_UNI != 4 & GRUPO_ID_UNI !=5)
  # Alternative you may want to incorporate banks that were created after 1997.4. Then,
  # you consider all banks alive at samSpec['samStart'] quarter:
  #  sam <- db %>% 
  #            # Select observations of banks alive at samSpec['samStart']         
  #            filter(START_Q <= samSpec['samStart'] & EXIT_DATE_Q > samSpec['samStart']) %>%
  #            # Only complete observations
  #            filter_all(all_vars(!is.na(.))) %>%
  #            # Only observations at these time
  #            filter(FECHAdata == samSpec['samStart'])
  #
            
  
  # Bank covariates (X)
  varsList <- c('ActivoN', 'C8Est_w', 'CAR_IRR_3A6', 'P_ROA', 'P_DEP_ARS_RATE', 'P_LOANS_ARS_RATE_W', 'APRSpNF_RATE_W', 'APR_USD_RATE', 'APR_RATE_W')
  X <- sam %>% select(all_of(varsList))
  # 1: survival, 0: failure during the sample period
  Y <- if_else(sam$EXIT_DATE_Y <= samSpec['samEnd'], 0, 1)
  
  # Table
  desc <- tibble('N' = length(unique(sam$IDENT)),
              'T' = length(unique(sam$FECHA_A)),
              'NxT' = N*T,
              'Avg n per panel' = sam %>% 
                          select(FECHA_A, IDENT) %>% 
                          group_by(IDENT) %>% summarise(n = n()) %$% mean(.$n),
              'Avg IDENT' = mean(sam$IDENT),
              'SampleStart' = min(sam$FECHA_A), 
              'SampleEnd' = max(sam$FECHA_A),
              'Failure until' = samSpec['samEnd'],
              'SurvivalRate' = round(mean(Y), digits=2))
  
  descVars <- X %>% 
    summarise(across(all_of(varsList), list(
      'min' = ~min(.x, na.rm=TRUE),
      'median' = ~round(median(.x, na.rm=TRUE)),
      'mean' = ~mean(.x, na.rm=TRUE),
      'max' = ~round(max(.x, na.rm=TRUE)),
      'SD' = ~round(sd(.x, na.rm=TRUE)) )))
  
  # corr$r contains the correlation coefficients and corr$p the p-values (H0: r==0)
  corr <- Hmisc::rcorr(as.matrix(X), type='pearson')
  
  #map_dfr(samSpec['samStart']s, function(samSpec['samStart']) {
  #  
  #  
  #})
  #h <- rms::orm(Y ~ Xt$ActivoN + Xt$C8Est_w + Xt$CAR_IRR_3A6 + Xt$P_ROA + Xt$P_DEP_ARS_RATE + Xt$P_LOANS_ARS_RATE_W + Xt$APRSpNF_RATE_W + Xt$APR_USD_RATE + Xt$APR_RATE_W, family=probit)
  p <- glm(as.matrix(Y) ~ 
             X$ActivoN + X$C8Est_w + X$CAR_IRR_3A6 + X$P_ROA + X$P_DEP_ARS_RATE + X$P_LOANS_ARS_RATE_W + X$APRSpNF_RATE_W + X$APR_USD_RATE + X$APR_RATE_W, 
           family=binomial(link='logit'), x=TRUE)
  
  list('desc' = desc, 'descVars' = descVars, 'corr'=corr, 'p' = p)
})
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

Compute marginal effects

A \(1- \alpha\) confidence interval for \(\hat{\beta_k}\) is \(\hat{\beta_k} \pm z_{\frac{\alpha}{2}} \: \frac{\hat{\sigma_k}}{\sqrt{n}}\)

# OUtput is r2$s1997q4 containing a tibble with marginal effects for each variable
# Compute marginal effects
# Marginal probabilities
r2 <- map(r1, function(r) {
  # r = r1$s97q4.Priv.01q4
  pSummary <- summary.glm(r$p)
  
  pMarginals <- dnorm( r$p$x %*% r$p$coefficients )
  
  # Marginal effect of the average bank
  x_Bar <- apply(r$p$x, MARGIN=2, mean) # Kx1 vector
  pMarginalAvgBank <- dnorm( t(x_Bar) %*% r$p$coefficients)
  
  pmap_dfr(as_tibble( pSummary$coefficients, rownames='var'), function(var, Estimate, `Std. Error`, ...) { 
  # CAMERON TRIVEDIR p467. Avergae Marginal effect of variable j is the average (over observations) of pdf(X Beta) * \hat{\beta}_j
    tibble('samStart' = r$desc$SampleStart,
           'samEnd' = r$desc$SampleStart,
           'n' = NROW(r$p$y),
           'var' = var, 
           'avgEffectMean' = mean(pMarginals * Estimate),
           'avgEffectMin90' = avgEffectMean - 1.645 * (`Std. Error`)*(1/sqrt( n )), 
           'avgEffectMax90' = avgEffectMean + 1.645 * (`Std. Error`)*(1/sqrt( n )),
           'effectAvgBankMean' = pMarginalAvgBank * Estimate,
           'effectAvgBankMin90' = effectAvgBankMean - 1.645* (`Std. Error`)*(1/sqrt( n )),
           'effectAvgBankMax90' = effectAvgBankMean + 1.645*(`Std. Error`)*(1/sqrt( n )))
  }) 
  
}) 

Results

Descriptive statistics

t <- map2_dfr(r1, names(r1), function(r, samName) {
      r$desc }) %>% t(.) %>% 
      as_tibble(., rownames='row_names')
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
colnames(t) <- c('Feature', names(samsSpecs) )

t %>% DT::datatable(., rownames=FALSE) %>% 
  formatRound(columns=c(2:NCOL(t)))

Means

tMeans <- map_dfr(r1, function(r) {
      r$descVars %>% select(ends_with('_mean')) }) %>%
      t(.) %>% 
      as_tibble(., rownames='row_names')
colnames(tMeans) <- c('Variable', names(samsSpecs) )

tMeans %>%
  DT::datatable(., rownames=FALSE) %>% formatRound(columns=c(2:NCOL(tMeans)))

Std Dev

tSDs <- map_dfr(r1, function(r) {
      r$descVars %>% select(ends_with('_SD')) }) %>%
      t(.) %>% 
      as_tibble(., rownames='row_names')
colnames(tSDs) <- c('Variable', names(samsSpecs) )

tSDs %>%
  DT::datatable(., rownames=FALSE) %>% formatRound(columns=c(2:NCOL(tSDs)))

Correlation coefficients

walk(r1, function(r) {
  colnames(r$corr$P) <- str_c( colnames(r$corr$r), '_P') 
  corrData <- cbind(r$corr$r, r$corr$P) %>% 
              as_tibble(., rownames='Variable')

  corrTable <-  DT::datatable(corrData, rownames = FALSE, 
                    caption='Correlation table. Bold correlations with p-value < 0.15',
                    extensions = 'FixedColumns',
                      options = list(dom = 'Variable', scrollX = TRUE, fixedColumns = TRUE))

  # Add format for significant correlations (p-value < 0.15)
    walk(colnames(r$corr$r), function(colName) {
      #browser()
      corrTable <<- formatStyle(corrTable, 
                      colName, str_c(colName, '_P'),
                      fontWeight = styleInterval(c(0.15), c('bold', 'normal')))
      corrTable <<- formatStyle(corrTable, 
                      str_c(colName, '_P'),
                      visibility= 'hidden')
    })
    #browser()
    print(corrTable %>% formatRound(columns=c(2:NCOL(corrData))))
    #print('2')

})

Average marginal effects

# Extract data on marginal effects for each sample
mEffects <- map_dfr(r2, function(sam) {
            sam
            })

plot_avg_m_effect <- function(varStr) {
    dPlot <- filter(mEffects, var== !!str_c('X$',varStr)) %>%
            mutate(
            'avgEffectMin90' = avgEffectMin90*100,
            'avgEffectMean' = avgEffectMean*100,
            'avgEffectMax90' = avgEffectMax90*100,
            'bar_length_radio' = (avgEffectMax90 - avgEffectMin90)/2,
              'samStart_str' = str_c(samStart))
    pInteractive <- plot_ly(data=dPlot,
               x = ~samStart_str,
               y = ~avgEffectMean,
               type = 'scatter', mode = 'markers',
               error_y = ~list(array = dPlot$bar_length_radio,
                          color = '#000000')) %>%
               #name = ~Item,
               #color = ~ Item,
               #colors = c("dodgerblue3","deeppink3","lightgreen")),
            layout(
               title = 'Marginal effect of capital ratio on survival probability',
               xaxis = list(type = 'category', title = 'Quarter of predictor'),
               yaxis = list(title = 'Marginal effect x100'))
                            #range = c(0,7))
    pStatic <- ggplot(data = dPlot,
         mapping=aes(x=samStart, y=avgEffectMean)) +
        geom_hline(yintercept=0) +
        geom_pointrange(aes(ymin=avgEffectMin90, ymax=avgEffectMax90), colour='blue')
    
    list('interactive' = pInteractive, 'static' = pStatic)

}

Capital ratio (C8Est_w)

Plot marginal effect for capital ratio for different initial values of predictor variables.

plots <- plot_avg_m_effect('C8Est_w')

plots$interactive
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
plots$static

Non-perfoaming loans

plots <- plot_avg_m_effect('CAR_IRR_3A6')

plots$interactive
plots$static

Deposits interest rate (P_DEP_ARS_RATE)

Plot marginal effect for Deposits interest rate for different initial values of predictor variables.

plots <- plot_avg_m_effect('P_DEP_ARS_RATE')

plots$interactive
plots$static

Loans interest rate (P_LOANS_ARS_RATE_W)

Plot marginal effect for Deposits interest rate for different initial values of predictor variables.

plots <- plot_avg_m_effect('P_LOANS_ARS_RATE_W')

plots$interactive
plots$static

PUblic sector exposure through loans (APRSpNF_RATE_W)

Plot marginal effect for PUblic sector exposure through loans for different initial values of predictor variables.

plots <- plot_avg_m_effect('APRSpNF_RATE_W')

plots$interactive
plots$static

USD loans (APR_USD_RATE)

Plot marginal effect for APR_USD_RATE for different initial values of predictor variables.

plots <- plot_avg_m_effect('APR_USD_RATE')

plots$interactive
plots$static

Loans to Assets ratio (APR_RATE_W)

Plot marginal effect for APR_RATE_W for different initial values of predictor variables.

plots <- plot_avg_m_effect('APR_RATE_W')

plots$interactive
plots$static

Marginal effects of average bank

Capital ratio (C8Est_w)

Plot marginal effect for capital ratio for different initial values of predictor variables.

plot_m_effect_avg_bank <- function(varStr) {
  dPlot <- filter(mEffects, var == !!str_c('X$',varStr)) %>%
            mutate(
              'effectAvgBankMin90' = effectAvgBankMin90*100,
              'effectAvgBankMean' = effectAvgBankMean*100,
              'effectAvgBankMax90' = effectAvgBankMax90*100,
              'bar_length_radio' = (effectAvgBankMax90 - effectAvgBankMin90)/2,
              'samStart_str' = str_c(samStart))
    pInteractive <- plot_ly(data=dPlot,
                 x = ~samStart_str,
                 y = ~effectAvgBankMean,
                 type = 'scatter', mode = 'markers',
                 error_y = ~list(array = dPlot$bar_length_radio,
                            color = '#000000')) %>%
                 #name = ~Item,
                 #color = ~ Item,
                 #colors = c("dodgerblue3","deeppink3","lightgreen")),
              layout(
                 title = str_c('Marginal effect of ', varStr, ' on survival probability'),
                 xaxis = list(type = 'category', title = 'Quarter of predictor'),
                 yaxis = list(title = 'Marginal effect x100'))
                              #range = c(0,7))
    pStatic <- ggplot(data = dPlot,
                 mapping=aes(x=samStart, y=effectAvgBankMean)) +
                geom_hline(yintercept=0) +
                geom_pointrange(aes(ymin=effectAvgBankMin90, ymax=effectAvgBankMax90), colour='blue')
    
    list('interactive' = pInteractive, 'static' = pStatic)

}
plots <- plot_m_effect_avg_bank('C8Est_w')

plots$interactive
plots$static

Non-perfoaming loans

plots <- plot_m_effect_avg_bank('CAR_IRR_3A6')

plots$interactive
plots$static

Deposits interest rate (P_DEP_ARS_RATE)

Plot marginal effect for Deposits interest rate for different initial values of predictor variables.

plots <- plot_m_effect_avg_bank('P_DEP_ARS_RATE')

plots$interactive
plots$static

Loans interest rate (P_LOANS_ARS_RATE_W)

Plot marginal effect for Deposits interest rate for different initial values of predictor variables.

plots <- plot_m_effect_avg_bank('P_LOANS_ARS_RATE_W')

plots$interactive
plots$static

PUblic sector exposure through loans (APRSpNF_RATE_W)

Plot marginal effect for PUblic sector exposure through loans for different initial values of predictor variables.

plots <- plot_m_effect_avg_bank('APRSpNF_RATE_W')

plots$interactive
plots$static

USD loans (APR_USD_RATE)

Plot marginal effect for APR_USD_RATE for different initial values of predictor variables.

plots <- plot_m_effect_avg_bank('APR_USD_RATE')

plots$interactive
plots$static

Loans to Assets ratio (APR_RATE_W)

Plot marginal effect for APR_RATE_W for different initial values of predictor variables.

plots <- plot_m_effect_avg_bank('APR_RATE_W')

plots$interactive
plots$static